package com.shi.desktop.indexer;

import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.OutputStreamWriter;

import org.pdfbox.pdmodel.PDDocument;
import org.pdfbox.util.PDFTextStripper;

public class TestPDF {

	/**
	 * @param args
	 * @throws Exception 
	 */
	public static void main(String[] args) throws Exception {
		File file = new File("d:\\testdocs\\2005011801.pdf");
		String content = getTxt(file);
		System.out.println(content);
	}

	public static String getTxt(File f) throws Exception {
		String ts = "";
		try {
			String temp = "";
			PDDocument pdfdocument = PDDocument.load(f);
			ByteArrayOutputStream out = new ByteArrayOutputStream();
			OutputStreamWriter writer = new OutputStreamWriter(out);
			PDFTextStripper stripper = new PDFTextStripper();
			stripper.writeText(pdfdocument.getDocument(), writer);
			pdfdocument.close();
			out.close();
			writer.close();
			byte[] contents = out.toByteArray();
			ts = new String(contents);
			System.out.println(f.getName() + "length is:" + contents.length
					+ "\n");
		} catch (Exception e) {
			e.printStackTrace();
		} finally {
			return ts;
		}
	}
}
